{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "PB5MNwqhemiu",
   "metadata": {
    "id": "PB5MNwqhemiu",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%pip uninstall -y sentence_transformers\n",
    "%pip install sentence_transformers==3.1.1 -q --user\n",
    "\n",
    "# Restart the kernel after installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "IiFfv1pCIzn3KaWkZ9whQHB3",
   "metadata": {
    "executionInfo": {
     "elapsed": 5233,
     "status": "ok",
     "timestamp": 1715337400431,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "IiFfv1pCIzn3KaWkZ9whQHB3",
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/jovyan/.local/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:13: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
      "  from tqdm.autonotebook import tqdm, trange\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sentence_transformers import SentenceTransformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cMkBO9Zrh8Yp",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 2180,
     "status": "ok",
     "timestamp": 1715337403525,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "cMkBO9Zrh8Yp",
    "outputId": "1adb1afc-2d33-43f5-9e78-96be864e26f1"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c30b09c16b9460aa57a819b1f583991",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model = SentenceTransformer('paraphrase-MiniLM-L3-v2') # try more powerful model: all-mpnet-base-v2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "G9UX2FUneDWq",
   "metadata": {
    "executionInfo": {
     "elapsed": 144,
     "status": "ok",
     "timestamp": 1715337404534,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "G9UX2FUneDWq"
   },
   "outputs": [],
   "source": [
    "#Sentences we want to encode. Example:\n",
    "sentence = [\n",
    "    'This blanket has such a cozy temperature for me!',\n",
    "    'I am so much warmer and snug using this spread!',\n",
    "    'Taylor Swift was 34 years old in 2024.']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1rD-yeh7eGl1",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 199,
     "status": "ok",
     "timestamp": 1715338234822,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "1rD-yeh7eGl1",
    "outputId": "dea6be9c-dad1-4a22-c2bd-93172dec2794"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.42288575  0.11801947  0.5898906  ... -0.07997336  0.16128041\n",
      "   0.15290566]\n",
      " [ 0.08642215 -0.01953113  0.3944788  ... -0.19359088  0.2726452\n",
      "   0.23386717]\n",
      " [ 0.02534086  0.47614327  0.11544228 ... -0.1039189   0.58169025\n",
      "  -0.7600805 ]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(3, 384)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Sentences are encoded by calling model.encode()\n",
    "embedding = model.encode(sentence)\n",
    "\n",
    "#Preview the embeddings\n",
    "print(embedding)\n",
    "embedding.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "wcfgqw4NjjMg",
   "metadata": {
    "executionInfo": {
     "elapsed": 215,
     "status": "ok",
     "timestamp": 1715338237387,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "wcfgqw4NjjMg"
   },
   "outputs": [],
   "source": [
    "# Euclidean Distance function\n",
    "def euclidean_distance(vec1, vec2):\n",
    "    return np.linalg.norm(vec1 - vec2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8YRl-XeTjTqB",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 231,
     "status": "ok",
     "timestamp": 1715338238253,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "8YRl-XeTjTqB",
    "outputId": "91262f53-c98f-482b-d6b6-e949714afd24"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Euclidean Distance: Review 1 vs Review 2: 4.6202908\n",
      "Euclidean Distance: Review 1 vs Random Comment: 7.3135476\n",
      "Euclidean Distance: Review 2 vs Random Comment: 6.3389034\n"
     ]
    }
   ],
   "source": [
    "# Euclidean Distance\n",
    "print(\"Euclidean Distance: Review 1 vs Review 2:\", euclidean_distance(embedding[0], embedding[1]))\n",
    "print(\"Euclidean Distance: Review 1 vs Random Comment:\", euclidean_distance(embedding[0], embedding[2]))\n",
    "print(\"Euclidean Distance: Review 2 vs Random Comment:\", euclidean_distance(embedding[1], embedding[2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "X8GFCtAwePEb",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 203,
     "status": "ok",
     "timestamp": 1715338240507,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "X8GFCtAwePEb",
    "outputId": "ca1038a2-68fd-48a9-d429-462a1f5a29c4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dot Product: Review 1 vs Review 2: 12.270499\n",
      "Dot Product: Review 1 vs Random Comment: -0.7654622\n",
      "Dot Product: Review 2 vs Random Comment: 0.9524107\n"
     ]
    }
   ],
   "source": [
    "# Dot Product\n",
    "print(\"Dot Product: Review 1 vs Review 2:\", np.dot(embedding[0], embedding[1]))\n",
    "print(\"Dot Product: Review 1 vs Random Comment:\", np.dot(embedding[0], embedding[2]))\n",
    "print(\"Dot Product: Review 2 vs Random Comment:\", np.dot(embedding[1], embedding[2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "MlUlzG9afLjM",
   "metadata": {
    "executionInfo": {
     "elapsed": 147,
     "status": "ok",
     "timestamp": 1715338242502,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "MlUlzG9afLjM"
   },
   "outputs": [],
   "source": [
    "# Cosine Distance function\n",
    "def cosine_distance(vec1,vec2):\n",
    "  cosine = 1 - abs((np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))))\n",
    "  return cosine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "sikwgBLPeQU8",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 153,
     "status": "ok",
     "timestamp": 1715338243571,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "sikwgBLPeQU8",
    "outputId": "f52eb3ba-1302-430c-be41-bce1edad97b2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine Distance: Review 1 vs Review 2: 0.4523801803588867\n",
      "Cosine Distance: Review 1 vs Random Comment: 0.9704556185752153\n",
      "Cosine Distance: Review 2 vs Random Comment: 0.9542623050510883\n"
     ]
    }
   ],
   "source": [
    "# Cosine Distance\n",
    "print(\"Cosine Distance: Review 1 vs Review 2:\", cosine_distance(embedding[0], embedding[1]))\n",
    "print(\"Cosine Distance: Review 1 vs Random Comment:\", cosine_distance(embedding[0], embedding[2]))\n",
    "print(\"Cosine Distance: Review 2 vs Random Comment:\", cosine_distance(embedding[1], embedding[2]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a736f722-8ed7-466e-9d0d-118beb4745d2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "CHAPTER8-DISTANCEMETRICS.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
